** Data reading and variable selection from raw data
** China Family Panel Studies 2010 (wave 1)


** 01. Reading data **

cap log close
clear all
set more off
cd /*insert you work directory here*/
unicode analyze adult.dta
unicode encoding set "GB18030" 
unicode translate adult.dta, invalid
use adult.dta
save adult.dta, replace 


** 02. Consructing year and country variables **

ge year=2010
lab var year "survey year"

ge country=156
lab var country "ISO country code"
//China: 156 (see "ISO Country Codes.pdf) 


** 03. ID variables **

lab var pid "person id"
lab var fid "family id"


** 04. Basic Demographics (Sex and Age/birth year) **

ge sex=gender
lab var sex "sex"
lab def sex 2 "female" 1 "male"
lab val sex sex

ge age=qa1age
lab var age "age"

ge birthyr=year-age
lab var birthyr "year of birth"


** 05. Siblings **

ge nbro=0

forvalue i=1/15 {
replace nbro=. if qb301_a_`i'==-2 | qb301_a_`i'==-1
replace nbro=nbro+1 if qb301_a_`i'==1 | qb301_a_`i'==3
}
lab var nbro "number of brothers"

ge nsis=0
forvalue i=1/15 {
replace nsis=. if qb301_a_`i'==-2 | qb301_a_`i'==-1
replace nsis=nsis+1 if qb301_a_`i'==2 | qb301_a_`i'==4
}
lab var nsis "number of sisters"

lab def nbro 99 "don't know/no answer"
lab val nbro nsis nbro

ge nolderbro=0
ge noldersis=0
forvalue i=1/15 {
replace nolderbro=. if qb301_a_`i'==-2 | qb301_a_`i'==-1
replace noldersis=. if qb301_a_`i'==-2 | qb301_a_`i'==-1
replace nolderbro=nolderbro+1 if qb301_a_`i'==1 
replace noldersis=noldersis+1 if qb301_a_`i'==2 
}

lab var nolderbro "number of older brothers"
lab var noldersis "number of older sisters"

ge birthorder=nolderbro+noldersis

ge nsib1=qb1
lab var nsib1 "reported by respondent: number of siblings"
lab def nsib1 -8 "not applicable" -2 "refused to answer" -1 "don't know"
lab val nsib1 nsib1

ge nsibs=nbro+nsis
lab var nsibs "number of siblings"


** 06. Own education **

rename educ educ_chinese
ge educ=educ_chinese
lab var educ "highest education of respondent"

lab def educc 1 "no education" 2 "primary school_for adults, literate school" 3 "primary school_ordinary" ///
4 "secondary school_for adults" 5 "secondary school_vocational" 6 "secondary school_ordinary" 7 "high school_technical school" ///
8 "high school_for adults" 9 "secondary vocational_for adults" 10 "high school_ordinary" 11 "polytech school_for adults" ///
12 "polytech school_ordinary" 13 "college_for adults" 14 "college_ordinary" 15 "graduate school" 16 "PhD"

lab val educ educc

//number of years spent in education
lab var eduy "number of years spent in education"


** 07. Parents' education: Father and/or Mother **

rename feduc feduc_chinese
ge feduc=feduc_chinese
lab var feduc "father's highest level of education"

rename meduc meduc_chinese
ge meduc=meduc_chinese
lab var meduc "mother's highest level of education"

lab def feducc 1 "illiterate/half illiterate" 2 "primary" 3 "secondary school" ///
4 "high school" 5 "technical college" 6 "university" 7 "graduate" 8 "PhD"

lab val feduc meduc faeducc 

//number of years spent in education
lab var feduy "father's number of years father spent in education"
lab var meduy "mother's number of years mother spent in education"


** 08. Own occupation **

//employment status
ge workstat=qg3
lab var workstat "if currently have a job"
lab def workstat -8 "not applicable" 0 "no" 1 "yes"
lab val workstat workstat

//type of employment
ge tyemp=qg303
lab var tyemp "type of employment"
lab def tyemp -8 "not applicable" -2 "refused to answer" -1 "don't know" 1 "self-employed" 3 "work for a company/organisation" 5 "argricultural work"
lab val tyemp tyemp

//industry
ge ind=qh406code
lab var ind "industry work for"

lab def ind -8 "not applicable" -2 "refused to answer" -1 "don't know" 1 "Agriculture, forestry, animal husbandry and fishery" ///
10 "Finance" 11 "Real estate" 12 "Rental and commercial service" 13 "Scientific research, technical service and geological prospecting" ///
15 "Residential and other service industry" 16 "Education" 17 "Health, social security and public welfare" ///
19 "Public administration and social organization" 2 "Mining" 21 "Other industries" 3 "Manufacturing" ///
4 "Production and supply of electricity, gas and water" 5 "Construction" 6 "Transportation, storage, and postal service" ///
7 "Information transmission, computer service" 8 "Wholesale and retail" 9 "Hotel and catering service"

lab val ind ind

//type of work unit
ge unit=qg305
lab var unit "type of work unit"

lab def unit -8 "not applicable" -2 "refused to answer" -1 "don't know" 1 "Government/Party/People’s organization/Military" ///
10 "Rural family business" 11 "Individually owned business (getihu)" 12 "Private non-profit" 13 "Association/Guild/Foundation/Social organization" ///
14 "Residential community committee/Village committee/Autonomous organization" 17 "Unable to identify" ///
2 "State-owned/Collectivelyowned public institution/Research" 3 "State-owned/Statecontrolled enterprise" 4 "Collectively-owned" ///
5 "Joint stock cooperative enterprise" 6 "Limited liability company/Company limited by shares" 7 "Private enterprise" 77 "Other" ///
8 "Enterprise invested in by Hong Kong/Macao/Taiwan" 9 "Company invested by foreign captial"

lab val unit unit

ge occ_code=qg307code
lab var occ_code "respondent's occupation (non-argricultural)"
lab def occ_code -8 "not applicable" -2 "refused to answer" -1 "don't know"
lab val occ_code occ_code


** 09. Parents' occupation **

ge focc_code=tb5_code_a_f
lab var focc_code "father's occupation at the time of the survey_code"
ge mocc_code=tb5_code_a_m
lab var mocc_code "mother's occupation at the time of the survey_code"


** 10. Tabulate the Identified Variables **

log using cd /*insert you work directory here*/, replace text

** Data reading and variable selection from raw data
** China Family Panel Studies 2010 (wave 1)

** Sex **
tab sex

** Age, Birth Year **
sum age birthyr, d

** Siblings **
sum nsibs birthorder, d

** R's Own Education **
tab1 educ eduy

** Parental Education **
tab1 feduc meduc feduy meduy

** R's Own Occupation **
tab1 workstat tyemp unit ind occ_code 

** Parental Occupation **
tab1 focc_code mocc_code 

log close

** 11. Keep the identified variables only

keep year country pid sex age birthyr ///
	 nsibs birthorder ///
	 educ eduy feduc meduc feduy meduy ///
	 workstat tyemp unit ind occ_code ///
	 focc_code mocc_code


** 12. Save the Data File **

saveold /*insert you work directory here*/, replace



** 13. Homoginising education **
** Own Education **
rename educ educ_cat
rename eduy educ_yrs

ge educ_ISCED=020 if educ_cat==1
replace educ_ISCED=100 if educ_cat==2 | educ_cat==3
replace educ_ISCED=244 if educ_cat==4 | educ_cat==5 | educ_cat==6
replace educ_ISCED=344 if educ_cat==8 | educ_cat==10
replace educ_ISCED=354 if educ_cat==7 | educ_cat==9
replace educ_ISCED=554 if educ_cat==11 | educ_cat==12
replace educ_ISCED=667 if educ_cat==13 | educ_cat==14
replace educ_ISCED=767 if educ_cat==15
replace educ_ISCED=864 if educ_cat==16
lab var educ_ISCED "respondent highest education in ISCED code"

** Parents Education **
//father's education is actually father's
ge faeduc_flag=1 

rename feduc faeduc_cat
rename feduy faeduc_yrs
rename meduc maeduc_cat
rename meduy maeduc_yrs

e faeduc_ISCED=020 if faeduc_cat==1
replace faeduc_ISCED=100 if faeduc_cat==2
replace faeduc_ISCED=244 if faeduc_cat==3
replace faeduc_ISCED=344 if faeduc_cat==4
replace faeduc_ISCED=554 if faeduc_cat==5
replace faeduc_ISCED=665 if faeduc_cat==6
replace faeduc_ISCED=767 if faeduc_cat==7
replace faeduc_ISCED=864 if faeduc_cat==8
lab var faeduc_ISCED "father highest education in ISCED code"

ge maeduc_ISCED=020 if maeduc_cat==1
replace maeduc_ISCED=100 if maeduc_cat==2
replace maeduc_ISCED=244 if maeduc_cat==3
replace maeduc_ISCED=344 if maeduc_cat==4
replace maeduc_ISCED=554 if maeduc_cat==5
replace maeduc_ISCED=665 if maeduc_cat==6
replace maeduc_ISCED=767 if maeduc_cat==7
replace maeduc_ISCED=864 if maeduc_cat==8
lab var maeduc_ISCED "mother highest education in ISCED code"

** 14. Homoginising sibling **
//cutoff
ge nsibs_flag=99
lab var nsibs_flag "cutoff of total number of siblings"

//recode missing
replace nsibs=. if nsibs==999

//number of brothers/sisters not avaialble


** 15. Tab Education and Sibling Variables **
tab1 sex age birthyr
tab1 educ_cat educ_yrs faeduc_cat faeduc_yrs maeduc_cat maeduc_yrs faeduc_flag 
tab1 nsibs nsibs_flag


** 16. Save the Data File **

saveold /*insert you work directory here*/, replace

